%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% This file is part of the book
%%
%% Primes and Factorization
%% https://code.google.com/p/factor-book/
%%
%% Copyright (C) 2010 Minh Van Nguyen <nguyenminh2@gmail.com>
%%
%% See the file COPYING for copying conditions. See the file LICENSE
%% for the terms under which the whole book is licensed.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\chapter{Unique Factorization}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Divisibility}

Denote the set of integers by\index{$\ZZ$}
\[
\ZZ
=
\{\dots, -3, -2, -1, 0, 1, 2, 3, \dots\}.
\]
Much of our exploration in number theory concerns the set of positive
integers. An important fact about the positive integers is expressed
by the
\emph{well-ordering principle}\index{well-ordering principle}. This
principle states that any nonempty set of positive integers
contains a least element.

\begin{lstlisting}
sage: ZZ
Integer Ring
sage: type(ZZ)
<type 'sage.rings.integer_ring.IntegerRing_class'>
sage: Integer  # Sage integer
<type 'sage.rings.integer.Integer'>
sage: int  # Python integer
<type 'int'>
\end{lstlisting}

\begin{theorem}
\label{thm:unique_factorization:well_ordering_principle}
\textbf{Well-ordering principle.}
\index{well-ordering principle}
Let $S$ be a nonempty set of positive integers. Then there is some
$a \in S$ such that $a \leq b$ for all $b \in S$.
\end{theorem}

A \emph{prime}\index{prime} is an integer $p > 1$ that cannot be
expressed as a product of positive integers other than 1 and $p$
itself. Otherwise, $p$ is said to be
\emph{composite}\index{composite}, i.e. it can be written as the
product of two integers greater than 1. A (prime)
\emph{factorization}\index{factorization} of an integer $n > 1$ is a
decomposition of $n$ into primes, written as
\[
n
=
p_1^{\alpha_1} p_2^{\alpha_2} \cdots p_r^{\alpha_r}
\]
where the $p_i$ are distinct primes and each $\alpha_i > 0$.

\begin{lstlisting}
sage: P = Primes(); P
Set of all prime numbers: 2, 3, 5, 7, ...
sage: p = P.first(); p
2
sage: p = P.next(p); p
3
sage: p = P.next(p); p
5
sage: p = P.next(p); p
7
sage: p = P.next(p); p
11
sage: list(primes(50))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
sage: prime_range(0, 50)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
sage: prime_range(50)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
sage: n = len(prime_range(50))
sage: primes_first_n(n)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
sage: p = 1
sage: p = next_prime(p); p
2
sage: p = next_prime(p); p
3
sage: p = next_prime(p); p
5
sage: p = next_prime(p); p
7
sage: p = next_prime(p); p
11
sage: map(is_prime, prime_range(40))
[True, True, True, True, True, True, True, True, True, True, True, True]
\end{lstlisting}

For any $a,n \in \ZZ$, we say that $a$ \emph{divides}\index{divide}
$n$ if $n = ab$ for some $b \in \ZZ$. The integer $a$ is also referred
to as a \emph{divisor}\index{divisor} of $n$, and that $n$ is a
\emph{multiple}\index{multiple} of $a$. If $a$ divides $n$, we
express this relationship as $a \divides n$\index{$\divides$}, and
similarly we write $a \notdivides n$\index{$\notdivides$} to mean that
$a$ does not divide $n$. We could also say that $a$ is a
\emph{factor}\index{factor} of $n$ or that $n$ is
\emph{divisible}\index{divisible} by $a$. There is no restriction on
whether $a$ must be positive, negative, or zero. Whenever we use the
notation $a \divides n$, it is understood that $a \neq 0$. A
\emph{prime divisor}\index{prime divisor} of $n$ is a prime $p$ such
that $p \divides n$.

\begin{lstlisting}
sage: a = 27; b = 3
sage: a.divides(b)
False
sage: b.divides(a)
True
sage: n = randint(-10^100, 10^100)
sage: p = random_prime(randint(0, 10^100))
sage: n = n * p
sage: n.divides(p)
False
sage: p.divides(n)
True
\end{lstlisting}

\begin{lemma}
\label{lem:unique_factorization:each_integer_greater_than_1_has_prime_divisor}
\index{prime divisor}
Every integer $n > 1$ has a prime divisor.
\end{lemma}

\begin{proof}
If $n$ is prime, then we are done since $n$ divides itself. Otherwise
$n = ab$ for two positive integers $a,b > 1$. If either $a$ or $b$ is
prime, then we have the result. Otherwise we continue on factorizing
the current divisors to obtain smaller and smaller divisors of
$n$. This process of factorizing must halt after a finite number of
steps because of finite descent\index{finite descent}, i.e. there is
no infinite sequence of descending positive integers.
\end{proof}

\begin{lstlisting}
sage: # First determine all the divisors, then find a prime divisor.
sage: n = randint(-10^50, 10^50)
sage: D = divisors(n)
sage: for d in D:
...       if is_prime(d):
...           print("n has a prime divisor")
...           break
n has a prime divisor
sage: # Or using the iter protocol.
sage: n = randint(-10^50, 10^50)
sage: for d in iter(divisors(n)):
...       if is_prime(d):
...           print("n has a prime divisor")
...           break
n has a prime divisor
\end{lstlisting}

\begin{lemma}
\index{infinitely many primes}
There are infinitely many primes.
\end{lemma}

\begin{proof}
Our proof is by contradiction. Suppose there are finitely many primes,
say $P = \{p_1, p_2, p_3, \dots, p_n\}$. Construct the integer
\[
N
=
p_1 p_2 p_3 \cdots p_n + 1.
\]
By
Lemma~\ref{lem:unique_factorization:each_integer_greater_than_1_has_prime_divisor},
$N$ has a prime divisor, say $p$. This prime $p$ cannot belong to $P$
since dividing $N$ by each element of $P$ gives a remainder of 1. Thus
$p$ is a prime not in the set $P$, which contradicts our assumption
that $P$ contains all the primes. Therefore there are infinitely many
primes.
\end{proof}

\begin{theorem}
\label{thm:unique_factorization:division_algorithm}
\textbf{Division algorithm.}
\index{division algorithm}
If $a,b$ are integers such that $b > 0$, then there exist unique
integers $q,r$ such that $a = bq + r$ and $0 \leq r < b$.
\end{theorem}

\begin{proof}
For the case of existence, consider the set
\[
S
=
\{a - bn \divides n \in \ZZ \text{ and } a - bn \geq 0 \}.
\]
We claim that $S$ is nonempty. Since $b \geq 1$, then $|a|b \geq |a|$
and
\[
a - (-|a|)b
=
a + |a|b \geq a + |a| \geq 0.
\]
Choosing $n = -|a|$, we see that $a - bn \in S$ and thus $S$ is not
empty. By the well-ordering\index{well-ordering principle}
principle~(Theorem~\ref{thm:unique_factorization:well_ordering_principle}),
$S$ has a least element, say, $r = a - bq$ for some $q \in \ZZ$. Now
$0 \leq r$ by definition of $S$. Suppose for contradiction that
$r \geq b$. Then
\[
0 \leq r - b = (a - bq) - b = a - b(q + 1)
\]
and $r - b \in S$ by definition. Thus $a - b(q + 1) = r - b < r$ so
that $r - b$ is an element of $S$ that is smaller than $r$, in
contradiction of our assumption that $r$ is the smallest element of
$S$. It follows that $r < b$ and we have found integers $r,q$
satisfying $a = bq + r$ and $0 \leq r < b$.

For the case of uniqueness, suppose we have integers $r^\prime$ and
$q^\prime$ that also satisfy $a = bq^{\prime} + r^{\prime}$. Then
$0 \leq r^\prime < b$ and
%
\begin{equation}
\label{eq:unique_factorization:two_remainders}
r - r^\prime
=
b(q^\prime - q).
\end{equation}
%
Multiply $0 \leq r^\prime < b$ through by $-1$ yields $0 \geq
-r^\prime > -b$ and consider the pair of inequalities
\[
\begin{array}{rcccl}
-b &<   & -r^\prime &\leq & 0, \\
0 &\leq & r         &<    & b.
\end{array}
\]
Add them together term by term gives $-b < r - r^\prime < b$.
Substituting~(\ref{eq:unique_factorization:two_remainders}) into the
last inequality and simplifying gives $-b < b(q^\prime - q) < b$,
which implies that $-1 < q^\prime - q < 1$. Since $q^\prime$ and $q$
are integers, then their difference is also an integer. The only
integer within the open interval $(-1, 1)$ is 0 and we must have
$q^\prime - q = 0$. Substituting this
into~(\ref{eq:unique_factorization:two_remainders}) gives
$r - r^\prime = 0$, thus $r = r^\prime$ and $q = q^\prime$. Therefore
$r$ and $q$ are unique.
\end{proof}

The remainder $r$ is also written as $r = a \bmod b$\index{$\bmod$}. The
operator $\bmod$ means, ``the remainder when $a$ is divided by $b$.''

\begin{lstlisting}
sage: a = 27; b = 4
sage: q = a // b; q
6
sage: r = a % b; r
3
sage: a == q*b + r
True
sage: q, r = divmod(a, b)
sage: a == q*b + r
True
sage: r = mod(a, b).lift()
sage: a == q*b + r
True
sage: q, r = a.quo_rem(b)
sage: a == q*b + r
True
sage: a = Integer(randint(-10^100, 10^100))
sage: b = Integer(randint(-10^100, 10^100))
sage: while b > a:
...       b = Integer(randint(-10^100, 10^100))
sage: q, r = a.quo_rem(b)
sage: a == q*b + r
True
\end{lstlisting}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{The Euclidean algorithm}

Let $a,b \in \ZZ$ such that at least one of $a$ or $b$ is different
from zero. The
\emph{greatest common divisor}\index{greatest common divisor} of $a$
and $b$, written $\gcd(a,b)$\index{$\gcd$}, is the positive integer
$d$ satisfying
%
\begin{enumerate}
\item $d \divides a$ and $d \divides b$

\item if $c \divides a$ and $c \divides b$ for some $c \in \ZZ$, then $c
  \leq d$.
\end{enumerate}
%
We extend this definition by setting $\gcd(0,0) = 0$. If
$\gcd(a,b) = 1$, we say that $a$ and $b$ are
\emph{relatively prime}\index{relatively prime} or
\emph{coprime}\index{coprime}.

\begin{lstlisting}
sage: gcd(42, 8)
2
sage: gcd(0, 0)
0
sage: # any two distinct primes are coprime
sage: p = random_prime(10^100)
sage: q = random_prime(10^100)
sage: while p == q:
...       q = random_prime(10^100)
sage: gcd(p, q)
1
sage: # if d = gcd(a,b), then d divides both a and b
sage: a = Integer(randint(-10^100, 10^100))
sage: b = Integer(randint(-10^100, 10^100))
sage: while a == b:
...       b = Integer(randint(-10^100, 10^100))
sage: d = gcd(a, b)
sage: d.divides(a); d.divides(b)
True
True
\end{lstlisting}

\begin{theorem}
\label{thm:unique_factorization:divides_any_two_terms_then_divides_third}
For all $a,b,c \in \ZZ$, if $n$ divides any two of $a,b,c$ in
the equation $a = b + c$, then $n$ divides the third.
\end{theorem}

\begin{proof}
Let $a,b,c$ be integers satisfying the equation $a = b + c$. If
$n$ is an integer that divides any two terms in the latter equation, then
we distinguish three separate cases.
%
\begin{enumerate}
\item If $n \divides a$ and $n \divides b$, then by definition $a = nx$ and
  $b = ny$ for some $x,y \in \ZZ$. Substituting these into the
  equation and simplifying gives $nx = (ny) + c$, which implies that
  $c = n(x - y)$ and hence $n \divides c$.

\item If $n \divides a$ and $n \divides c$, we use an argument similar to
  the above.

\item If $n \divides b$ and $n \divides c$, apply the same argument as in
  the first case.
\end{enumerate}
%
In each of these three cases, we see that whenever $n$ divides any
two terms in the equation $a = b + c$, then $n$ also divides the
third term.
\end{proof}

\begin{lstlisting}
sage: a = 42; b = 8
sage: q, r = a.quo_rem(b); q; r
5
2
sage: gcd(a, b); gcd(b, r)
2
2
sage: a = Integer(randint(-10^100, 10^100))
sage: b = Integer(randint(-10^100, 10^100))
sage: while a == b:
...       b = Integer(randint(-10^100, 10^100))
sage: q, r = a.quo_rem(b)
sage: gcd(a, b) == gcd(b, r)
True
\end{lstlisting}

\begin{lemma}
\label{lem:unique_factorization:gcd(a,b)_eq_gcd(b,r)}
Let $a,b,q,r \in \ZZ$ with $b > 0$ and $0 \leq r < b$ such that
$a = bq + r$. Then $\gcd(a,b) = \gcd(b,r)$.
\end{lemma}

\begin{proof}
Let $a,b,q,r \in \ZZ$ such that $a = bq + r$ where $b > 0$ and
$0 \leq r < b$. Let $d = \gcd(a,b)$, then it clearly follows that
$d \divides bq$ as well. By
Theorem~\ref{thm:unique_factorization:divides_any_two_terms_then_divides_third},
$d$ also divides $r$ as required.
\end{proof}

The GCD of two integers can be found by listing all their positive
divisors and choosing the largest one common to both. This method is
cumbersome for large integers. In the seventh book of the
\emph{Elements}, Euclid presents an efficient method involving
repeated application of the division
algorithm~(Theorem~\ref{thm:unique_factorization:division_algorithm}),
called the Euclidean algorithm. Let $a,b \in \ZZ$. Since
$\gcd(|a|, |b|) = \gcd(a,b)$, there is no loss of generality in
assuming that $a \geq b > 0$. First, apply the division algorithm to
$a,b$ to get
\[
a
=
bq_1 + r_1, \qquad\qquad 0 \leq r_1 < b.
\]
If $r_1 = 0$, then $b \divides a$ and $\gcd(a,b) = b$. Otherwise,
$r_1 \neq 0$ so divide $b$ by $r_1$ to produce
$q_2, r_2 \in \ZZ$ satisfying
\[
b
=
r_1 q_2 + r_2, \qquad\qquad 0 \leq r_2 < r_1.
\]
If $r_2 = 0$, then we are done. Otherwise, we repeat the same process
to obtain
\[
r_1
=
r_2 q_3 + r_3, \qquad\qquad 0 \leq r_3 < r_2.
\]
We continue applying the division process until a zero remainder
appears, say, at the $(n+1)$-th stage where $r_{n-1}$ is divided by
$r_n$. Sooner or later, a zero remainder appears since the decreasing
sequence $b > r_1 > r_2 > \cdots \geq 0$ cannot contain more than $b$
integers. We then end up with the following system of equations:
%
\begin{align*}
a &= bq_1 + r_1, \qquad\qquad 0 < r_1 < b \\
b &= r_1 q_2 + r_2, \qquad\qquad 0 < r_2 < r_1 \\
r_1 &= r_2 q_3 + r_3, \qquad\qquad 0 < r_3 < r_2 \\
& \vdots \\
r_{n-2} &= r_{n-1} q_n + r_n, \qquad\qquad 0 < r_n < r_{n-1} \\
r_{n-1} &= r_n q_{n+1} + 0.
\end{align*}
%
The Euclidean algorithm states that $r_n$, the last nonzero remainder
which appears in this manner, is equal to $\gcd(a,b)$.

\begin{algorithm}[!htpb]
%% no semicolon at end of pseudocode statements
\dontprintsemicolon
%% data section
\SetKwInOut{Input}{Input}
\SetKwInOut{Output}{Output}
%% input/output
\Input{Two integers $a > 0$ and $b \geq 0$.}
\Output{The greatest common divisor of $a,b$.}
\BlankLine
%% algorithm body
$x \assign a$\;
$y \assign b$\;
\While{$y \neq 0$}{
  $r \assign x \mod y$\;
  $x \assign y$\;
  $y \assign r$\;
}
\Return $x$
\caption{The Euclidean algorithm.}
\index{Euclidean algorithm}
\label{alg:unique_factorization:Euclidean_algorithm}
\end{algorithm}

\begin{theorem}
\label{thm:unique_factorization:Euclidean_algorithm}
\textbf{Euclidean algorithm.}
\index{Euclidean algorithm}
Let $a$ and $b$ be positive integers with $a \geq b$. If $b \divides a$,
then $\gcd(a,b) = b$. If $b \notdivides a$, then apply the division
algorithm repeatedly as follows:
\[
\begin{array}{rclclccccrclcl}
a   &=& bq_0    &+& r_0 &&&& 0 &<&    r_0 &<& b \\
b   &=& r_0 q_1 &+& r_1 &&&& 0 &\leq& r_1 &<& r_0 \\
r_0 &=& r_1 q_2 &+& r_2 &&&& 0 &\leq& r_2 &<& r_1 \\
r_1 &=& r_2 q_3 &+& r_3 &&&& 0 &\leq& r_3 &<& r_2 \\
& \vdots
\end{array}
\]
This process ends when a remainder of $0$ is obtained. That is,
for some integer $t$ we have
\[
\begin{array}{rclclccccrclcl}
r_{t-2} &=& r_{t-1} q_t &+& r_t &&&& 0 &<& r_t &<& r_{t-1} \\
r_{t-1} &=& r_t q_{t+1} &+& 0
\end{array}
\]
Then $r_t = \gcd(a,b)$.
\end{theorem}

\begin{proof}
If $b \divides a$, then $a = bq + 0$ for some integer $q$. By
Lemma~\ref{lem:unique_factorization:gcd(a,b)_eq_gcd(b,r)}, we have
$\gcd(a,b) = \gcd(b,0) = b$ and we are done. In case
$b \notdivides a$, then repeated application of
Lemma~\ref{lem:unique_factorization:gcd(a,b)_eq_gcd(b,r)} to the list
of divisions yields
\[
\gcd(a,b)
=
\gcd(b, r_0)
=
\gcd(r_0, r_1)
=
\cdots
=
\gcd(r_t, 0)
=
r_t
\]
as required.
\end{proof}

The Euclidean algorithm can be directly translated into algorithmic
form as presented in
Algorithm~\ref{alg:unique_factorization:Euclidean_algorithm}.

\begin{lstlisting}
sage: def my_gcd(a, b):
...       # Return the GCD of and b.
...       #
...       # INPUT:
...       #
...       # - a -- integer > 0
...       # - b -- integer >= 0; a >= b
...       #
...       # OUTPUT:
...       #
...       # gcd(a,b)
...       x = a
...       y = b
...       while y != 0:
...           r = mod(x, y).lift()
...           x = y
...           y = r
...       return x
sage: my_gcd(42, 12)
6
sage: gcd(42, 12)
6
sage: a = Integer(randint(1, 10^200))
sage: b = Integer(randint(0, 10^200))
sage: while a < b:
...       b = Integer(randint(0, 10^200))
sage: my_gcd(a, b) == gcd(a, b)
True
sage: q, r = a.quo_rem(b)
sage: gcd(a, b) == gcd(b, r)
True
sage: my_gcd(a, b) == my_gcd(b, r)
True
sage: gcd(b, r) == my_gcd(b, r)
True
\end{lstlisting}

\begin{theorem}
\label{thm:unique_factorization:Bezout's_identity}
\textbf{B\'ezout's identity.}\index{B\'ezout's identity}
If $a,b$ are integers such that both are not zero, then there exist
integers $x,y$ such that $\gcd(a,b) = ax + by$.
\end{theorem}

\begin{proof}
Consider the set of linear combinations
\[
S
=
\{au + bv \divides u,v \in \ZZ\}.
\]
Let $c = am + bn$ be the smallest positive integer in $S$ for
some $m,n \in \ZZ$. Since $c > 0$, by the division
algorithm~(Theorem~\ref{thm:unique_factorization:division_algorithm})
there exist $q,r \in \ZZ$ such that $a = qc + r$, where
$0 \leq r < c$. The remainder $r$ can be expressed as $r = a - qc$
and, since $c = am + bn$, so we have $r = a - q(am+bn) = a(1-mq) - bnq$,
which implies that $r = a(1-mq) + b(-nq)$. By hypothesis, $0 \leq r < c$.
If $r \neq 0$, then $r$ is a positive integer such that $r < c$. But
then $r$ is expressible as a linear combination in terms of $a$ and
$b$ and hence $r \in S$ by definition of $S$. Here lies a
contradiction because $c \in S$ is by assumption the smallest positive
integer expressible as a linear combination of $a$ and $b$. Hence
$r = 0$. Since $a = qc + r$ and $r = 0$, then $a = qc$ and hence
$c \divides a$. A similar argument shows that $c \divides b$.

We have shown that with $c = am + bn$, then $c$ divides each of $a$
and $b$. We now demonstrate that $c = \gcd(a,b)$. Let $d = \gcd(a,b)$
and so by definition we have $d \divides a$ and $d \divides b$, hence
by
Theorem~\ref{thm:unique_factorization:divides_any_two_terms_then_divides_third}
we have $d \,|\, c$ and by definition $c = dz$ for some integer
$z$. Since $c$ and $d$ are both positive by
assumption, $z$ must be a natural number. If $z > 1$ then $c$ is a
common divisor of both $a$ and $b$ such that $c > d$, in
contradiction of our assumption that $d = \gcd(a,b)$. Thus $z = 1$
and it follows that $c = d$. Therefore $\gcd(a,b) = ax + by$ for
some $x,y \in \ZZ$.
\end{proof}

To compute the integers $x,y$ guaranteed by B\'ezout's identity, we
extend the Euclidean algorithm as follows. Set up a table with five
columns:
\[
\begin{array}{rcccc}
  i  & q & r & u & v \\\hline
  0  &   & m & 1 & 0 \\
  1  &   & n & 0 & 1
\end{array}
\]
To fill in the whole table, for each row $i \geq 2$ define
\[
q_i
=
\left\lfloor
\frac {r_{i-2}} {r_{i-1}}
\right\rfloor
\]
where the symbol $\lfloor x \rfloor$ denotes the integer part of $x$.
Next, define
%
\begin{align*}
r_i &= r_{i-2} - r_{i-1} q_i \\
u_i &= u_{i-2} - u_{i-1} q_i \\
v_i &= v_{i-2} - v_{i-1} q_i
\end{align*}
%
and stop when we obtain $r_k = 0$ for some $k \in \ZZ$, at which point
we get
%
\begin{align*}
s &= u_{k-1}, \\
t &= v_{k-1}.
\end{align*}
%
Our discussion is summarized in
Algorithm~\ref{alg:unique_factorization:extended_Euclidean_algorithm},
called the
\emph{extended Euclidean algorithm}\index{extended Euclidean algorithm}.

\begin{algorithm}[!htpb]
%% no semicolon at end of pseudocode statements
\dontprintsemicolon
%% data section
\SetKwInOut{Input}{Input}
\SetKwInOut{Output}{Output}
%% algorithm body
\Input{Integers $a$ and $b$, both not zero.}
\Output{Integers $s$ and $t$ such that $\gcd(a,b) = as + bt$.}
\BlankLine
$r_0 \assign a$;\quad $u_0 \assign 1$;\quad $v_0 \assign 0$\;
$r_1 \assign b$;\quad $u_1 \assign 0$;\quad $v_1 \assign 1$\;
$i \assign 1$\;
\While{$r_i \neq 0$}{
  $i \assign i + 1$\;
  $q_i \assign \left\lfloor \frac {r_{i-2}} {r_{i-1}} \right\rfloor$\;
  $r_i \assign r_{i-2} - r_{i-1} q_i$\;
  $u_i \assign u_{i-2} - u_{i-1} q_i$\;
  $v_i \assign v_{i-2} - v_{i-1} q_i$\;
}
\Return $(u_{i-1},\, v_{i-1})$
\caption{The extended Euclidean algorithm.}
\label{alg:unique_factorization:extended_Euclidean_algorithm}
\index{extended Euclidean algorithm}
\end{algorithm}

The version of the extended Euclidean algorithm as presented in
Algorithm~\ref{alg:unique_factorization:extended_Euclidean_algorithm}
is inefficient in terms of memory requirement. The algorithm keeps
track of all the $q_i, r_i, u_i, v_i$ while it runs, whereas we only
need the last two computed values of $q_i, r_i, u_i, v_i$.
Algorithm~\ref{alg:unique_factorization:extended_GCD_memory_efficient}
presents a version of the extended Euclidean algorithm that is more
memory efficient than
Algorithm~\ref{alg:unique_factorization:extended_Euclidean_algorithm}.

\begin{algorithm}[!htbp]
%% no semicolon at end of pseudocode statements
\dontprintsemicolon
%% data section
\SetKwInOut{Input}{Input}
\SetKwInOut{Output}{Output}
%% algorithm body
\Input{Integers $a$ and $b$, both not zero.}
\Output{Integers $s$ and $t$ such that $\gcd(a,b) = as + bt$.}
\BlankLine
$r_1 \assign a$;\quad $r_2 \assign b$\;
$u_1 \assign 1$;\quad $u_2 \assign 0$\;
$v_1 \assign 0$;\quad $v_2 \assign 1$\;
\While{$r_2 \neq 0$}{
  $q \assign \left\lfloor \frac{r_1}{r_2} \right\rfloor$\;
  $t \assign r_2$;\quad $r_2 \assign r_1 - r_2 q$;\quad $r_1 \assign t$\;
  $t \assign u_2$;\quad $u_2 \assign u_1 - u_2 q$;\quad $u_1 \assign t$\;
  $t \assign v_2$;\quad $v_2 \assign v_1 - v_2 q$;\quad $v_1 \assign t$\;
}
\Return $(u_1,\, v_1)$
\caption{The extended Euclidean algorithm with less memory
  requirement.}
\label{alg:unique_factorization:extended_GCD_memory_efficient}
\index{extended Euclidean algorithm}
\end{algorithm}

\begin{lstlisting}
sage: def my_xgcd(a, b):
...       # Returns s and t such that gcd(a,b) = as + bt.
...       #
...       # INPUT:
...       #
...       # - a -- integer
...       # - b -- integer, both a and b cannot be zero
...       #
...       # OUTPUT:
...       #
...       # Integers s,t such that gcd(a,b) = as + bt.
...       r1 = a; r2 = b
...       u1 = 1; u2 = 0
...       v1 = 0; v2 = 1
...       while r2 != 0:
...           q = floor(r1 / r2)
...           t = r2; r2 = r1 - r2*q; r1 = t
...           t = u2; u2 = u1 - u2*q; u1 = t
...           t = v2; v2 = v1 - v2*q; v1 = t
...       return (u1, v1)
sage: D, S, T = xgcd(27, 12)
sage: s, t = my_xgcd(27, 12)
sage: S == s; T == t
True
True
sage: s; t
1
-2
sage: s, t = my_xgcd(27, 12)
sage: gcd(27, 12)
3
sage: 27*s + 12*t
3
sage: a = Integer(randint(-10^200, 10^200))
sage: b = Integer(randint(-10^200, 10^200))
sage: while (a == 0) and (b == 0):
...       a = Integer(randint(-10^200, 10^200))
...       b = Integer(randint(-10^200, 10^200))
sage: D, S, T = xgcd(a, b)
sage: s, t = my_xgcd(a, b)
sage: S == s; T == t
True
True
sage: gcd(a, b) == D == a*s + b*t
True
\end{lstlisting}

\begin{corollary}
\label{cor:unique_factorization:prime_divisor_property}
\textbf{Prime divisor property.}
\index{prime divisor property}
Let $a,b$ be positive integers and let $p$ be prime. If $p \divides ab$,
then $p \divides a$ or $p \divides b$.
\end{corollary}

\begin{proof}
Let $p$ be prime and let $a,b > 1$ be integers such that
$p \divides ab$. If $p \divides a$, then we are done. Otherwise
$\gcd(p,a) = 1$ and we need to show that $p$ divides $b$. Since $p$
and $a$ are coprime, use B\'ezout's
identity~(Theorem~\ref{thm:unique_factorization:Bezout's_identity})
\index{B\'ezout's identity} to express them as the linear combination
$1 = am + pn$ for some $m,n \in \ZZ$. Multiplying both sides by $b$
gives $b = abm + bnp$. By assumption $p \divides ab$ and it is obvious
that $p \divides bp$. Hence $p$ divides the two terms $abm$ and $bnp$
in the linear combination $b = abm + bnp$. By
Theorem~\ref{thm:unique_factorization:divides_any_two_terms_then_divides_third},
it follows that $p$ divides $b$ as required.
\end{proof}

\begin{corollary}
\label{cor:unique_factorization:divides_product_then_divides_one_term}
If a prime $p$ divides the product $a_1 a_2 a_3 \cdots a_n$ for some
positive integer $n$, then $p \divides a_i$ for some integer
$1 \leq i \leq n$.
\end{corollary}

\begin{proof}
If $p \divides a_{n}$ then we are done. Otherwise it must be the case
that $p, a_n$ are coprime. Then repeated use of
Corollary~\ref{cor:unique_factorization:prime_divisor_property}
shows\index{prime divisor property} that $p$ divides one of
$a_1, a_2, a_3, \dots, a_{n-1}$.
\end{proof}

With necessary preparation out of the way, we now come to the
culmination of this chapter: the Fundamental Theorem of
Arithmetic\index{Fundamental Theorem of Arithmetic}. This theorem
asserts that any integer $n > 1$ can be factored into primes in
essentially one way. A prime factorization such as
$2 \cdot 3 \cdot 2$ is not considered as being different from
$2 \cdot 2 \cdot 3$.

\begin{theorem}
\label{thm:unique_factorization:Fundamental_Theorem_Arithmetic}
\textbf{Fundamental Theorem of Arithmetic.}
\index{Fundamental Theorem of Arithmetic}
\index{uniqueness of prime factorization}
Any integer $n > 1$ can be factored into a product of primes. This
representation is unique, apart from the order in which the factors
occur.
\end{theorem}

\begin{proof}
First, we establish the case of existence of a prime factorization for
each integer $n > 1$. By
Lemma~\ref{lem:unique_factorization:each_integer_greater_than_1_has_prime_divisor},
each integer $n > 1$ has a prime divisor. Repeated application of this
lemma on the divisors of $n$ yields a factorization of $n$ into a
product of primes.

For the case of uniqueness, suppose $n > 1$ is the smallest integer
that has two distinct prime factorizations
\[
n
=
\prod_{i=1}^s p_i
=
\prod_{j=1}^t q_j.
\]
By the definition of $n$, for all $1 \leq i \leq s$ we have
$p_i \neq q_j$ for any $1 \leq j \leq t$. Clearly $p_1$ divides
$\prod_{j=1}^t q_j$ and by
Corollary~\ref{cor:unique_factorization:divides_product_then_divides_one_term},
$p_1$ divides one of the terms $q_j$. This is a contradiction because
it implies that one of the primes $q_j$ is composite or equal to
$p_1$. Therefore, prime factorization is unique.
\end{proof}

Several of the primes that appear in the factorization of a given
positive integer may be repeated. By collecting like primes and
replacing them by a single factor, we could rephrase
Theorem~\ref{thm:unique_factorization:Fundamental_Theorem_Arithmetic}
as follows.

\begin{corollary}
Any positive integer $n > 1$ can be written uniquely in a
\emph{canonical form}\index{canonical form}
\[
n
=
p_1^{\alpha_1} p_2^{\alpha_2} \cdots p_r^{\alpha_r}
\]
where each $\alpha_i$ is a positive integer and each $p_i$ is a prime,
with $p_1 < p_2 < \cdots < p_r$.
\end{corollary}

\begin{lstlisting}
sage: F = factor(28); F
2^2 * 7
sage: type(F)
<class 'sage.structure.factorization.Factorization'>
sage: P = [p for p, _ in F]
sage: map(is_prime, P)
[True, True]
sage: F = [(p, e) for p, e in F]; F
[(2, 2), (7, 1)]
sage: 28 == prod(p^e for p, e in F)
True
sage: # do the same for a random integer
sage: n = randint(4, 10^50)
sage: F = factor(n)
sage: type(F)
<class 'sage.structure.factorization.Factorization'>
sage: P = [p for p, _ in F]
sage: all(map(is_prime, P))
True
sage: n == prod(p^e for p, e in F)
True
\end{lstlisting}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Trial division}
\index{trial division}

\begin{theorem}
\label{thm:unique_factorization:prime_divisor_leq_sqrt_n}
If $n > 1$ is composite\index{composite}, then $n$ has a prime divisor
$p$ such that $p \leq \sqrt{n}$.
\end{theorem}

\begin{proof}
In the canonical prime factorization of $n$, let $p$ be the least
prime divisor of $n$. If $n = st$ for some integers $s,t > 1$, then
$p \leq s$ and $p \leq t$. Multiply the last two inequalities
term-by-term to get $p^2 \leq st = n$ and therefore $p \leq \sqrt{n}$.
\end{proof}

\begin{corollary}
\label{cor:unique_factorization:n_prime_iff_not_divisible_by_prime_leq_sqrt_n}
An integer $p > 1$ is prime if and only if it is not divisible by any
prime $q$, where $1 < q \leq \sqrt{p}$.
\end{corollary}

\begin{proof}
Follows from the definition of prime and
Theorem~\ref{thm:unique_factorization:prime_divisor_leq_sqrt_n}.
\end{proof}

\begin{lstlisting}
sage: n = randint(2, 10^50)
sage: D = sorted(divisors(n))
sage: p = 2
sage: for d in D:
...       if is_prime(d):
...           p = d
...           break
sage: bool(p <= sqrt(n))
True
sage: # a simple primality test
sage: def my_primality(n):
...       P = primes_first_n(prime_pi(sqrt(n)))
...       if bool(P[-1] <= sqrt(n)):
...           if bool(next_prime(P[-1]) <= sqrt(n)):
...               P.append(next_prime(P[-1]))
...       for p in P:
...           if p.divides(n):
...               print("n is composite")
...               return
...       print("n is prime")
...       return
sage: # first test that a composite is not prime
sage: n = randint(2, 10^10)
sage: while is_prime(n):
...       n = randint(2, 10^10)
sage: is_prime(n)
False
sage: my_primality(n)
n is composite
sage: # now test that a prime is indeed prime
sage: n = randint(2, 10^10)
sage: while not is_prime(n):
...       n = randint(2, 10^10)
sage: is_prime(n)
True
sage: my_primality(n)
n is prime
\end{lstlisting}

\begin{algorithm}[!htpb]
%% no semicolon at end of pseudocode statements
\dontprintsemicolon
%% data section
\SetKwInOut{Input}{Input}
\SetKwInOut{Output}{Output}
%% input/output
\Input{An integer $n > 3$.}
\Output{A list of all primes $\leq n$.}
\BlankLine
%% algorithm body
$A \assign [0, 1, 2, \dots, n]$ \tcc*[f]{elements of $A$ indexed as $a_i$}\;
$j \assign 2$\;
\While{$j^2 \leq n$ \nllabel{alg:Eratosthenes:all_integers_leq_sqrt_n}}{
  \If{$a_j \neq 0$ \nllabel{alg:Eratosthenes:primality_test}}{
    $i \assign 2$ \nllabel{alg:Eratosthenes:eliminate_multiples_start}\;
    \While{$ij \leq n$}{
      $a_{i \times j} \assign 0$\;
      $i \assign i + 1$ \nllabel{alg:Eratosthenes:eliminate_multiples_end}\;
    }
  }
  $j \assign j + 1$ \nllabel{alg:Eratosthenes:increment_j}\;
}
$P \assign [\,]$ \nllabel{alg:Eratosthenes:get_primes_start} \tcc*[f]{list of primes}\;
\For{$i \assign 2, \dots, n$}{
  \If{$a_i \neq 0$}{
    append $a_i$ to $P$\;
  }
}
\Return $P$ \nllabel{alg:Eratosthenes:get_primes_end}
\caption{The Sieve of Eratosthenes.}
\index{Sieve of Eratosthenes}
\label{alg:unique_factorization:sieve_eratosthenes}
\end{algorithm}

Eratosthenes used
Corollary~\ref{cor:unique_factorization:n_prime_iff_not_divisible_by_prime_leq_sqrt_n}
as the basis of a clever technique, called the
\emph{Sieve of Eratosthenes}\index{Sieve of Eratosthenes}, for finding
all primes below a given integer $n > 1$. Start by listing all the
integers from 2 to $n$. Then eliminate all the composite numbers by
striking out all multiples $2p, 3p, 4p, 5p, \dots$ of the primes
$p \leq \sqrt{n}$. The remaining integers on the list are primes.

The Sieve of Eratosthenes\index{Sieve of Eratosthenes} is summarized
in Algorithm~\ref{alg:unique_factorization:sieve_eratosthenes}. The
loop starting from line~\ref{alg:Eratosthenes:all_integers_leq_sqrt_n}
considers all integers $\leq \sqrt{n}$. Its worst-case runtime is
$O(\sqrt{n})$. Line~\ref{alg:Eratosthenes:primality_test} is a
primality test. If $a_j \neq 0$, then $a_j$ is
prime. Lines~\ref{alg:Eratosthenes:eliminate_multiples_start}
to~\ref{alg:Eratosthenes:eliminate_multiples_end} eliminate all
multiples of the prime $a_j$, with runtime $O(\sqrt{n})$. Any multiple
of $a_j$ is set to $0$. In total,
lines~\ref{alg:Eratosthenes:all_integers_leq_sqrt_n}
to~\ref{alg:Eratosthenes:increment_j} has a runtime of $O(\sqrt{n}
\times \sqrt{n}) = O(n)$. Lines~\ref{alg:Eratosthenes:get_primes_start}
to~\ref{alg:Eratosthenes:get_primes_end} fetch all the primes
remaining in the list $A$. The loop in this fetching step has runtime
$O(n)$. In total, the worst-case runtime of
Algorithm~\ref{alg:unique_factorization:sieve_eratosthenes} is
$O(n + n) = O(2n)$.

\begin{lstlisting}
sage: def eratosthenes(n):
...       # The Sieve of Eratosthenes.
...       A = [0..n]
...       j = 2
...       while j^2 <= n:
...           if A[j] != 0:
...               i = 2
...               while i*j <= n:
...                   A[i*j] = 0
...                   i += 1
...           j += 1
...       P = []
...       for i in range(2, n+1):
...           if A[i] != 0:
...               P.append(A[i])
...       return P
sage: eratosthenes(50)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
sage: n = randint(2, 10^6)
sage: P = eratosthenes(n)
sage: all(map(is_prime, P))
True
sage: all(map(is_prime, prime_range(n)))
True
sage: P == prime_range(n)
True
\end{lstlisting}

The Fundamental Theorem of
Arithmetic~(Theorem~\ref{thm:unique_factorization:Fundamental_Theorem_Arithmetic})
guarantees that each integer $n > 1$ can be uniquely factored into
primes. In practice, we can obtain all the prime factors of $n$ by a
combination of trial division and the Sieve of
Eratosthenes~(Algorithm~\ref{alg:unique_factorization:sieve_eratosthenes}). Use
the Sieve of Eratosthenes to produce a list of all primes
$\leq \sqrt{n}$ and the least prime $\geq \sqrt{n}$. That is,
\[
D
=
[d_0, d_1, d_2, \dots, d_{k-1}, d_k]
\]
where $d_0, \dots, d_{k-1}$ are all primes $\leq \sqrt{n}$, $d_k$ is
the least prime $\geq \sqrt{n}$, and
\[
d_0 < d_1 < d_2 < \cdots < d_k.
\]
The list $D$ is called the list of \emph{candidate prime divisors}
\index{candidate prime divisors} of $n$. If $n$ is prime, then by
Corollary~\ref{cor:unique_factorization:n_prime_iff_not_divisible_by_prime_leq_sqrt_n}
none of the $d_i$ is a factor of $n$. If any $d_i$ divides $n$, then
repeatedly divide $n$ by $d_i$ as many times as possible. The
procedure is summarized in
Algorithm~\ref{alg:unique_factorization:trial_division}.
{\color{red}{This algorithm is wrong; rework it!}}

\begin{algorithm}[!htpb]
%% no semicolon at end of pseudocode statements
\dontprintsemicolon
%% data section
\SetKwInOut{Input}{Input}
\SetKwInOut{Output}{Output}
%% input/output
\Input{An integer $n > 1$. A list
  $D = [d_0, d_1, d_2, \dots, d_{k-1}, d_k]$ of candidate prime
  divisors of $n$.}
\Output{A prime factorization of $n$.}
\BlankLine
%% algorithm body
$N \assign n$\;
$P \assign [\,]$ \tcc*[f]{list of prime factors of $n$}\;
\For{$i \assign 0,\dots,k$}{
  \While{$\gcd(d_i, N) > 1$}{
    append $d_i$ to $P$\;
    $N \assign N / d_i$\;
  }
}
\If{$\length(P) = 0$}{
  \Return $n$\;
}
\Return $P$
\caption{Prime factorization by trial division.}
\index{trial division}
\label{alg:unique_factorization:trial_division}
\end{algorithm}
